sum_em (suma total de emisiones)# Lectura de datos
datos <- read.csv(file = "https://www.datos.gov.co/api/views/25nw-b8kq/rows.csv?accessType=DOWNLOAD")
# Depuración de datos
library(tidyverse)
library(Hmisc)
datos %>%
select(depto = Departamentos,
total_pred = Total.predios,
total_bov = Total.Bovinos...2017,
area_agri = Area.total.sembrada.del.sector.agricola,
area_fores = Agroforestal..Ha.,
area_cons = ConservaciOn.de.Suelos...Ha.,
area_gan = Ganadera..Ha.,
em_agri = Emisiones.Sector.AgrIcola..Miles.de.ton.CO2.eq.,
em_fores = Emisiones.Sector.Forestal..Miles.de.ton.CO2.eq.,
em_pec = Sector.Pecuario..Miles.de.ton.CO2.eq.) %>%
mutate(depto = capitalize(tolower(depto)),
depto = gsub("Boyaca", "Boyacá", depto, ignore.case = TRUE),
depto = gsub("Distrito capital", "Cundinamarca", depto,
ignore.case = TRUE),
sum_em = em_agri + em_fores + em_pec) ->
datos2
datos2datos2 %>%
select_if(is.numeric) %>%
gather() %>%
ggplot(data = ., aes(x = value)) +
facet_wrap(~key, scales = "free") +
geom_density() +
ggtitle("Escala original")datos2 %>%
select_if(is.numeric) %>%
gather() %>%
ggplot(data = ., aes(x = value)) +
facet_wrap(~key, scales = "free") +
geom_density() +
scale_x_log10() +
ggtitle("Escala logarítmica")library(corrplot)
library(RColorBrewer)
datos2 %>%
select_if(is.numeric) %>%
select(-c(em_agri:em_pec)) %>%
mutate_if(is.numeric, log) %>%
filter_all(all_vars(.>0)) %>%
cor(use = "pairwise.complete.obs") %>%
corrplot(type = "upper", diag = FALSE, tl.srt = 45, tl.col = "black",
method = "pie", addgrid.col = "black",
col = brewer.pal(n = 8, name = "RdBu"))library(jcolors)
datos2 %>%
select(which(sapply(datos2,class) %in% c("numeric", "integer")), depto) %>%
gather(key = "variable", value = "valor", -depto) %>%
mutate(valor = log10(valor)) %>%
ggplot(data = ., aes(x = depto, y = valor, fill = depto)) +
geom_boxplot() +
scale_fill_jcolors(palette = "pal3") +
facet_wrap(facets = ~variable, scales = "free", ncol = 3) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")graphics)plot(datos2$total_bov, datos2$sum_em, log = "xy")
abline(lm(log10(datos2$sum_em) ~ log10(datos2$total_bov)),
col = "red")ggplot2datos2 %>%
ggplot(., aes(x = total_bov, y = sum_em)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm", se = FALSE, color = "red")datos2 %>%
ggplot(., aes(x = total_bov, y = sum_em, color = depto)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm", se = FALSE) +
scale_color_jcolors(palette = "pal3") +
theme(legend.position = "bottom") +
labs(color = "")datos2 %>%
select(-c(em_agri:em_pec)) %>%
gather(key = "variable", value = "valor", -c(sum_em, depto)) %>%
ggplot(data = ., aes(x = valor, y = sum_em, color = depto)) +
facet_wrap(~ variable, scales = "free") +
geom_point(alpha = 0.3) +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm", se = FALSE) +
scale_color_jcolors(palette = "pal3") +
theme(legend.position = "bottom") +
labs(color = "")library(FactoMineR)
datos2 %>%
select(-c(em_agri:em_pec, depto)) %>%
mutate(area_fores = log10(area_fores+1),
area_cons = log10(area_cons+1),
area_gan = log10(area_gan+1),
total_pred = log10(total_pred),
total_bov = log10(total_bov),
area_agri = log10(area_agri),
sum_em = log10(sum_em))-> datos_acp
acp <- PCA(X = datos_acp, scale.unit = TRUE, graph = FALSE)
summary(acp)##
## Call:
## PCA(X = datos_acp, scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 3.098 1.132 0.948 0.731 0.573 0.325 0.195
## % of var. 44.255 16.173 13.539 10.440 8.179 4.636 2.779
## Cumulative % of var. 44.255 60.428 73.967 84.406 92.586 97.221 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3
## 1 | 3.175 | -2.648 0.719 0.696 | -0.022 0.000 0.000 | 0.458
## 2 | 3.254 | 2.562 0.673 0.620 | -0.375 0.039 0.013 | 0.722
## 3 | 1.456 | 0.610 0.038 0.175 | 0.540 0.082 0.137 | 0.828
## 4 | 1.537 | 0.457 0.021 0.088 | 0.076 0.002 0.002 | 1.431
## 5 | 2.577 | -2.325 0.554 0.814 | 0.172 0.008 0.004 | -0.038
## 6 | 3.687 | -2.613 0.700 0.502 | -1.376 0.531 0.139 | 1.728
## 7 | 1.457 | -0.177 0.003 0.015 | 0.137 0.005 0.009 | 1.206
## 8 | 2.166 | -0.925 0.088 0.182 | -0.410 0.047 0.036 | -0.514
## 9 | 1.989 | -1.402 0.201 0.497 | 0.787 0.174 0.157 | -0.967
## 10 | 1.652 | -0.617 0.039 0.139 | 0.902 0.228 0.298 | -0.420
## ctr cos2
## 1 0.070 0.021 |
## 2 0.175 0.049 |
## 3 0.229 0.323 |
## 4 0.686 0.866 |
## 5 0.000 0.000 |
## 6 1.000 0.220 |
## 7 0.487 0.686 |
## 8 0.089 0.056 |
## 9 0.313 0.236 |
## 10 0.059 0.065 |
##
## Variables
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr cos2
## total_pred | 0.657 13.954 0.432 | 0.259 5.928 0.067 | 0.540 30.768 0.292
## total_bov | 0.904 26.387 0.817 | 0.050 0.217 0.002 | -0.027 0.079 0.001
## area_agri | 0.715 16.519 0.512 | 0.015 0.019 0.000 | -0.338 12.046 0.114
## area_fores | 0.307 3.041 0.094 | 0.787 54.720 0.619 | 0.063 0.425 0.004
## area_cons | 0.403 5.247 0.163 | -0.538 25.608 0.290 | 0.613 39.714 0.376
## area_gan | 0.564 10.268 0.318 | -0.389 13.400 0.152 | -0.361 13.783 0.131
## sum_em | 0.873 24.584 0.762 | -0.035 0.108 0.001 | -0.174 3.185 0.030
##
## total_pred |
## total_bov |
## area_agri |
## area_fores |
## area_cons |
## area_gan |
## sum_em |
datos2$cp1 <- acp$ind$coord[,1]
datos2$cp2 <- acp$ind$coord[,2]
datos2$cp3 <- acp$ind$coord[,3]
library(factoextra)
library(ggpubr)
ggarrange(
datos2 %>%
ggplot(data = ., aes(x = cp1, y = cp2, color = depto)) +
geom_point() +
geom_vline(xintercept = 0, color = "red", lty = 2, size = 0.5) +
geom_hline(yintercept = 0, color = "red", lty = 2, size = 0.5) +
scale_color_jcolors(palette = "pal3") +
theme(legend.position = "bottom") +
labs(color = ""),
fviz_pca_var(acp, axes = c(1,2)),
ncol = 2
)ggarrange(
datos2 %>%
ggplot(data = ., aes(x = cp1, y = cp3, color = depto)) +
geom_point() +
geom_vline(xintercept = 0, color = "red", lty = 2, size = 0.5) +
geom_hline(yintercept = 0, color = "red", lty = 2, size = 0.5) +
scale_color_jcolors(palette = "pal3") +
theme(legend.position = "bottom") +
labs(color = ""),
fviz_pca_var(acp, axes = c(1,3)),
ncol = 2
)ggarrange(
datos2 %>%
ggplot(data = ., aes(x = cp2, y = cp3, color = depto)) +
geom_point() +
geom_vline(xintercept = 0, color = "red", lty = 2, size = 0.5) +
geom_hline(yintercept = 0, color = "red", lty = 2, size = 0.5) +
scale_color_jcolors(palette = "pal3") +
theme(legend.position = "bottom") +
labs(color = ""),
fviz_pca_var(acp, axes = c(2,3)),
ncol = 2
)library(plotly)
plot_ly(data = datos2, x = ~cp1, y = ~cp2, z = ~cp3, color = ~depto) %>%
add_markers()\[H_0: \mu_{Boyacá} = \mu_{Cundinamarca} = \mu_{Meta} = \mu_{Tolima} \\ H_1: \mu_i \neq \mu_j\]
modelo_anova1 <- aov(sum_em ~ depto, data = datos2)
summary(object = modelo_anova1)## Df Sum Sq Mean Sq F value Pr(>F)
## depto 3 8946782 2982261 32.16 <2e-16 ***
## Residuals 309 28652051 92725
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
modelo_anova2 <- aov(sum_em ~ depto, data = datos2 %>%
mutate(sum_em = log(sum_em)))
summary(object = modelo_anova2)## Df Sum Sq Mean Sq F value Pr(>F)
## depto 3 175 58.34 76.72 <2e-16 ***
## Residuals 309 235 0.76
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
par(mfrow = c(2,2))
plot(modelo_anova1)par(mfrow = c(2,2))
plot(modelo_anova2)TukeyHSD(x = modelo_anova1, conf.level = 0.95)## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = sum_em ~ depto, data = datos2)
##
## $depto
## diff lwr upr p adj
## Cundinamarca-Boyacá 26.25064 -75.97960 128.4809 0.9107847
## Meta-Boyacá 598.73355 436.23721 761.2299 0.0000000
## Tolima-Boyacá 47.73825 -87.29744 182.7739 0.7978420
## Meta-Cundinamarca 572.48291 409.03952 735.9263 0.0000000
## Tolima-Cundinamarca 21.48760 -114.68624 157.6614 0.9770957
## Tolima-Meta -550.99530 -736.72994 -365.2607 0.0000000
library(broom)
tidy(TukeyHSD(x = modelo_anova1, conf.level = 0.95)) %>%
ggplot(data = ., aes(x = reorder(comparison, estimate), y = estimate,
ymin = conf.low, ymax = conf.high)) +
geom_errorbar(width = 0.2) +
geom_point() +
geom_hline(yintercept = 0, lty =2, color = "red", size = 0.5) +
labs(x = "") +
coord_flip()